{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# RNN Sentiment Classifier"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In this notebook, we use an RNN to classify IMDB movie reviews by their sentiment."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/the-deep-learners/deep-learning-illustrated/blob/master/notebooks/rnn_sentiment_classifier.ipynb)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Load dependencies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "import keras\n",
    "from keras.datasets import imdb\n",
    "from keras.preprocessing.sequence import pad_sequences\n",
    "from keras.models import Sequential\n",
    "from keras.layers import Dense, Dropout, Embedding, SpatialDropout1D\n",
    "from keras.layers import SimpleRNN # new! \n",
    "from keras.callbacks import ModelCheckpoint\n",
    "import os\n",
    "from sklearn.metrics import roc_auc_score \n",
    "import matplotlib.pyplot as plt \n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Set hyperparameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# output directory name:\n",
    "output_dir = 'model_output/rnn'\n",
    "\n",
    "# training:\n",
    "epochs = 16 # way more!\n",
    "batch_size = 128\n",
    "\n",
    "# vector-space embedding: \n",
    "n_dim = 64 \n",
    "n_unique_words = 10000 \n",
    "max_review_length = 100 # lowered due to vanishing gradient over time\n",
    "pad_type = trunc_type = 'pre'\n",
    "drop_embed = 0.2 \n",
    "\n",
    "# RNN layer architecture:\n",
    "n_rnn = 256 \n",
    "drop_rnn = 0.2\n",
    "\n",
    "# dense layer architecture: \n",
    "# n_dense = 256\n",
    "# dropout = 0.2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Load data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "(x_train, y_train), (x_valid, y_valid) = imdb.load_data(num_words=n_unique_words) # removed n_words_to_skip"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Preprocess data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train = pad_sequences(x_train, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0)\n",
    "x_valid = pad_sequences(x_valid, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "#### Design neural network architecture"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = Sequential()\n",
    "model.add(Embedding(n_unique_words, n_dim, input_length=max_review_length)) \n",
    "model.add(SpatialDropout1D(drop_embed))\n",
    "model.add(SimpleRNN(n_rnn, dropout=drop_rnn))\n",
    "# model.add(Dense(n_dense, activation='relu')) # typically don't see top dense layer in NLP like in \n",
    "# model.add(Dropout(dropout))\n",
    "model.add(Dense(1, activation='sigmoid'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "embedding_1 (Embedding)      (None, 100, 64)           640000    \n",
      "_________________________________________________________________\n",
      "spatial_dropout1d_1 (Spatial (None, 100, 64)           0         \n",
      "_________________________________________________________________\n",
      "simple_rnn_1 (SimpleRNN)     (None, 256)               82176     \n",
      "_________________________________________________________________\n",
      "dense_1 (Dense)              (None, 1)                 257       \n",
      "=================================================================\n",
      "Total params: 722,433\n",
      "Trainable params: 722,433\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "model.summary() "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Configure model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "modelcheckpoint = ModelCheckpoint(filepath=output_dir+\"/weights.{epoch:02d}.hdf5\")\n",
    "if not os.path.exists(output_dir):\n",
    "    os.makedirs(output_dir)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Train!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 25000 samples, validate on 25000 samples\n",
      "Epoch 1/16\n",
      "25000/25000 [==============================] - 18s 709us/step - loss: 0.7023 - acc: 0.5132 - val_loss: 0.6993 - val_acc: 0.5311\n",
      "Epoch 2/16\n",
      "25000/25000 [==============================] - 17s 689us/step - loss: 0.6865 - acc: 0.5422 - val_loss: 0.6649 - val_acc: 0.5922\n",
      "Epoch 3/16\n",
      "25000/25000 [==============================] - 17s 682us/step - loss: 0.5905 - acc: 0.6776 - val_loss: 0.5816 - val_acc: 0.6835\n",
      "Epoch 4/16\n",
      "25000/25000 [==============================] - 17s 682us/step - loss: 0.5716 - acc: 0.6966 - val_loss: 0.5647 - val_acc: 0.7055\n",
      "Epoch 5/16\n",
      "25000/25000 [==============================] - 17s 680us/step - loss: 0.5848 - acc: 0.6982 - val_loss: 0.5329 - val_acc: 0.7458\n",
      "Epoch 6/16\n",
      "25000/25000 [==============================] - 17s 678us/step - loss: 0.5263 - acc: 0.7399 - val_loss: 0.6334 - val_acc: 0.6453\n",
      "Epoch 7/16\n",
      "25000/25000 [==============================] - 17s 681us/step - loss: 0.4484 - acc: 0.7962 - val_loss: 0.5039 - val_acc: 0.7758\n",
      "Epoch 8/16\n",
      "25000/25000 [==============================] - 17s 678us/step - loss: 0.5540 - acc: 0.7086 - val_loss: 0.6503 - val_acc: 0.6119\n",
      "Epoch 9/16\n",
      "25000/25000 [==============================] - 17s 677us/step - loss: 0.5298 - acc: 0.7416 - val_loss: 0.6444 - val_acc: 0.7170\n",
      "Epoch 10/16\n",
      "25000/25000 [==============================] - 17s 675us/step - loss: 0.5070 - acc: 0.7624 - val_loss: 0.5628 - val_acc: 0.7406\n",
      "Epoch 11/16\n",
      "25000/25000 [==============================] - 17s 675us/step - loss: 0.4657 - acc: 0.7842 - val_loss: 0.5673 - val_acc: 0.7324\n",
      "Epoch 12/16\n",
      "25000/25000 [==============================] - 17s 672us/step - loss: 0.4607 - acc: 0.7895 - val_loss: 0.6675 - val_acc: 0.5698\n",
      "Epoch 13/16\n",
      "25000/25000 [==============================] - 17s 671us/step - loss: 0.4289 - acc: 0.8077 - val_loss: 0.5532 - val_acc: 0.7346\n",
      "Epoch 14/16\n",
      "25000/25000 [==============================] - 17s 673us/step - loss: 0.6410 - acc: 0.6558 - val_loss: 0.7551 - val_acc: 0.6234\n",
      "Epoch 15/16\n",
      "25000/25000 [==============================] - 17s 675us/step - loss: 0.5058 - acc: 0.7543 - val_loss: 0.5916 - val_acc: 0.7038\n",
      "Epoch 16/16\n",
      "25000/25000 [==============================] - 17s 672us/step - loss: 0.4012 - acc: 0.8309 - val_loss: 0.5920 - val_acc: 0.7510\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7face1628ac8>"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_valid, y_valid), callbacks=[modelcheckpoint])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "#### Evaluate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.load_weights(output_dir+\"/weights.07.hdf5\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_hat = model.predict_proba(x_valid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAE65JREFUeJzt3XGsXvV93/H3JxDStU1iAwYh28xEdbPQSiHsChxF6tKQGQMV5o8wOVuHi6x56mjVbtVWZ/vDHTQT2bSlQ2vpvOLVRG0IZcuwElbmOaBsU00whdIARXYIBcsMuxicdSjpSL/74/mZXMy177nX9z5P7N/7JT16zvme33nO78e98uee3znPIVWFJKk/75h0ByRJk2EASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjp19qQ7cDLnn39+rVq1atLdkN7uW8+O3t/z/sn2Q5rBY4899mdVtWy2dt/XAbBq1Sr27t076W5Ib/ffPzp6//jDk+yFNKMkfzqknVNAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVOzBkCS9yd5YtrrW0l+Mcm5SXYl2dfel7b2SXJHkv1Jnkxy+bTP2tja70uycTEHJkk6uVkDoKqerarLquoy4K8DrwNfBLYAu6tqNbC7rQNcA6xur83AnQBJzgW2AlcCVwBbj4WGJGn85joFdBXwjar6U2A9sKPVdwA3tOX1wN01sgdYkuQi4GpgV1UdqapXgV3AulMegSRpXub6TeANwOfb8oVV9RJAVb2U5IJWXw68OG2fA612ovpbJNnM6MyBiy++eI7de6tVW758SvvP1/O3XzeR40rSXAw+A0hyDnA98HuzNZ2hViepv7VQta2qpqpqatmyWR9lIUmap7lMAV0D/GFVvdzWX25TO7T3Q61+AFg5bb8VwMGT1CVJEzCXAPgk35v+AdgJHLuTZyNw/7T6Te1uoDXA0TZV9CCwNsnSdvF3batJkiZg0DWAJD8I/E3g708r3w7cm2QT8AJwY6s/AFwL7Gd0x9DNAFV1JMltwKOt3a1VdeSURyBJmpdBAVBVrwPnHVd7hdFdQce3LeCWE3zOdmD73LspSVpofhNYkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdGhQASZYkuS/JnyR5JsmHk5ybZFeSfe19aWubJHck2Z/kySSXT/ucja39viQbF2tQkqTZDT0D+LfA71fVXwM+CDwDbAF2V9VqYHdbB7gGWN1em4E7AZKcC2wFrgSuALYeCw1J0vjNGgBJ3gP8BHAXQFX9RVW9BqwHdrRmO4Ab2vJ64O4a2QMsSXIRcDWwq6qOVNWrwC5g3YKORpI02JAzgPcBh4H/mOTxJL+V5IeAC6vqJYD2fkFrvxx4cdr+B1rtRHVJ0gQMCYCzgcuBO6vqQ8D/5XvTPTPJDLU6Sf2tOyebk+xNsvfw4cMDuidJmo8hAXAAOFBVj7T1+xgFwsttaof2fmha+5XT9l8BHDxJ/S2qaltVTVXV1LJly+YyFknSHMwaAFX1v4EXk7y/la4CngZ2Asfu5NkI3N+WdwI3tbuB1gBH2xTRg8DaJEvbxd+1rSZJmoCzB7b7eeB3kpwDPAfczCg87k2yCXgBuLG1fQC4FtgPvN7aUlVHktwGPNra3VpVRxZkFJKkORsUAFX1BDA1w6arZmhbwC0n+JztwPa5dFCStDj8JrAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHVq6KMgJKk7q7Z8eWLHfv726xb9GJ4BSFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnRoUAEmeT/LHSZ5IsrfVzk2yK8m+9r601ZPkjiT7kzyZ5PJpn7Oxtd+XZOPiDEmSNMRczgB+sqouq6qptr4F2F1Vq4HdbR3gGmB1e20G7oRRYABbgSuBK4Ctx0JDkjR+pzIFtB7Y0ZZ3ADdMq99dI3uAJUkuAq4GdlXVkap6FdgFrDuF40uSTsHQACjgvyV5LMnmVruwql4CaO8XtPpy4MVp+x5otRPV3yLJ5iR7k+w9fPjw8JFIkuZk6P8P4CNVdTDJBcCuJH9ykraZoVYnqb+1ULUN2AYwNTX1tu2SpIUx6Aygqg6290PAFxnN4b/cpnZo74da8wPAymm7rwAOnqQuSZqAWQMgyQ8lefexZWAt8HVgJ3DsTp6NwP1teSdwU7sbaA1wtE0RPQisTbK0Xfxd22qSpAkYMgV0IfDFJMfa/25V/X6SR4F7k2wCXgBubO0fAK4F9gOvAzcDVNWRJLcBj7Z2t1bVkQUbiSRpTmYNgKp6DvjgDPVXgKtmqBdwywk+azuwfe7dlCQtNL8JLEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkTg0OgCRnJXk8yZfa+iVJHkmyL8kXkpzT6u9q6/vb9lXTPuNTrf5skqsXejCSpOHmcgbwC8Az09Y/A3y2qlYDrwKbWn0T8GpV/Qjw2daOJJcCG4AfA9YBv5HkrFPrviRpvgYFQJIVwHXAb7X1AB8D7mtNdgA3tOX1bZ22/arWfj1wT1V9p6q+CewHrliIQUiS5m7oGcCvAf8E+Mu2fh7wWlW90dYPAMvb8nLgRYC2/Whr/2Z9hn0kSWM2awAk+SngUFU9Nr08Q9OaZdvJ9pl+vM1J9ibZe/jw4dm6J0mapyFnAB8Brk/yPHAPo6mfXwOWJDm7tVkBHGzLB4CVAG37e4Ej0+sz7POmqtpWVVNVNbVs2bI5D0iSNMysAVBVn6qqFVW1itFF3K9U1d8BHgI+0ZptBO5vyzvbOm37V6qqWn1Du0voEmA18LUFG4kkaU7Onr3JCf0ycE+SXwUeB+5q9buAzyXZz+gv/w0AVfVUknuBp4E3gFuq6runcHxJ0imYUwBU1cPAw235OWa4i6eqvg3ceIL9Pw18eq6dlCQtPL8JLEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktSpWQMgyQ8k+VqSP0ryVJJ/3uqXJHkkyb4kX0hyTqu/q63vb9tXTfusT7X6s0muXqxBSZJmN+QM4DvAx6rqg8BlwLoka4DPAJ+tqtXAq8Cm1n4T8GpV/Qjw2daOJJcCG4AfA9YBv5HkrIUcjCRpuLNna1BVBfx5W31nexXwMeBvt/oO4FeAO4H1bRngPuDfJUmr31NV3wG+mWQ/cAXwBwsxkO8nq7Z8eSLHff726yZyXEmnp0HXAJKcleQJ4BCwC/gG8FpVvdGaHACWt+XlwIsAbftR4Lzp9Rn2mX6szUn2Jtl7+PDhuY9IkjTIoACoqu9W1WXACkZ/tX9gpmbtPSfYdqL68cfaVlVTVTW1bNmyId2TJM3DnO4CqqrXgIeBNcCSJMemkFYAB9vyAWAlQNv+XuDI9PoM+0iSxmzIXUDLkixpy38F+DjwDPAQ8InWbCNwf1ve2dZp27/SriPsBDa0u4QuAVYDX1uogUiS5mbWi8DARcCOdsfOO4B7q+pLSZ4G7knyq8DjwF2t/V3A59pF3iOM7vyhqp5Kci/wNPAGcEtVfXdhhyNJGmrIXUBPAh+aof4co+sBx9e/Ddx4gs/6NPDpuXdTkrTQ/CawJHXKAJCkTg25BiBJEzWpL1ee6TwDkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE7NGgBJViZ5KMkzSZ5K8gutfm6SXUn2tfelrZ4kdyTZn+TJJJdP+6yNrf2+JBsXb1iSpNkMOQN4A/ilqvoAsAa4JcmlwBZgd1WtBna3dYBrgNXttRm4E0aBAWwFrgSuALYeCw1J0vjNGgBV9VJV/WFb/j/AM8ByYD2wozXbAdzQltcDd9fIHmBJkouAq4FdVXWkql4FdgHrFnQ0kqTB5nQNIMkq4EPAI8CFVfUSjEICuKA1Ww68OG23A612orokaQIGB0CSHwb+E/CLVfWtkzWdoVYnqR9/nM1J9ibZe/jw4aHdkyTN0aAASPJORv/4/05V/edWfrlN7dDeD7X6AWDltN1XAAdPUn+LqtpWVVNVNbVs2bK5jEWSNAdD7gIKcBfwTFX9m2mbdgLH7uTZCNw/rX5TuxtoDXC0TRE9CKxNsrRd/F3bapKkCTh7QJuPAH8X+OMkT7TaPwVuB+5Nsgl4AbixbXsAuBbYD7wO3AxQVUeS3AY82trdWlVHFmQUkqQ5mzUAqup/MvP8PcBVM7Qv4JYTfNZ2YPtcOihJWhx+E1iSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASerUkIfB6TSxasuXJ3bs52+/bmLHljQ/ngFIUqcMAEnqlFNAkgab5DSjFp5nAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTswZAku1JDiX5+rTauUl2JdnX3pe2epLckWR/kieTXD5tn42t/b4kGxdnOJKkoYacAfw2sO642hZgd1WtBna3dYBrgNXttRm4E0aBAWwFrgSuALYeCw1J0mTM+kWwqvpqklXHldcDH23LO4CHgV9u9burqoA9SZYkuai13VVVRwCS7GIUKp8/5RHo+8KkviDkM4ik+ZvvN4EvrKqXAKrqpSQXtPpy4MVp7Q602onqb5NkM6OzBy6++OJ5dk86c/ltXC2Uhb4InBlqdZL624tV26pqqqqmli1btqCdkyR9z3zPAF5OclH76/8i4FCrHwBWTmu3AjjY6h89rv7wPI8tvWlSfw3f875XWPO+8yZybGmhzDcAdgIbgdvb+/3T6j+X5B5GF3yPtpB4EPgX0y78rgU+Nf9uS5O357lX2OB0jE5jswZAks8z+uv9/CQHGN3Ncztwb5JNwAvAja35A8C1wH7gdeBmgKo6kuQ24NHW7tZjF4QlSZMx5C6gT55g01UztC3glhN8znZg+5x6J0laNH4TWJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnRp7ACRZl+TZJPuTbBn38SVJI2MNgCRnAb8OXANcCnwyyaXj7IMkaWTcZwBXAPur6rmq+gvgHmD9mPsgSWL8AbAceHHa+oFWkySN2dljPl5mqNVbGiSbgc1t9c+TPDvws88H/uwU+nY6c+xj9uE3l35q3Ic+xp/5GS6fmbE8dOx/dcgxxh0AB4CV09ZXAAenN6iqbcC2uX5wkr1VNXVq3Ts9Ofb+xt7ruMGxL+TYxz0F9CiwOsklSc4BNgA7x9wHSRJjPgOoqjeS/BzwIHAWsL2qnhpnHyRJI+OeAqKqHgAeWISPnvO00RnEsfen13GDY18wqarZW0mSzjg+CkKSOnXaBcBsj5JI8q4kX2jbH0myavy9XBwDxv6Pkjyd5Mkku5MMuhXsdDD0ESJJPpGkkpwRd4kMGXeSv9V+7k8l+d1x93GxDPh9vzjJQ0keb7/z106inwstyfYkh5J8/QTbk+SO9t/lySSXz/tgVXXavBhdOP4G8D7gHOCPgEuPa/MPgN9syxuAL0y632Mc+08CP9iWf7ansbd27wa+CuwBpibd7zH9zFcDjwNL2/oFk+73GMe+DfjZtnwp8Pyk+71AY/8J4HLg6yfYfi3wXxl9r2oN8Mh8j3W6nQEMeZTEemBHW74PuCrJTF9AO93MOvaqeqiqXm+rexh9z+JMMPQRIrcB/xL49jg7t4iGjPvvAb9eVa8CVNWhMfdxsQwZewHvacvv5bjvFJ2uquqrwJGTNFkP3F0je4AlSS6az7FOtwAY8iiJN9tU1RvAUeC8sfRucc31MRqbGP2VcCaYdexJPgSsrKovjbNji2zIz/xHgR9N8r+S7Emybmy9W1xDxv4rwE8nOcDozsKfH0/XJm7BHqkz9ttAT9Gsj5IY2OZ0NHhcSX4amAL+xqL2aHxOOvYk7wA+C/zMuDo0JkN+5mczmgb6KKMzvv+R5Mer6rVF7ttiGzL2TwK/XVX/OsmHgc+1sf/l4ndvohbs37jT7Qxg1kdJTG+T5GxGp4YnO506XQwZO0k+Dvwz4Pqq+s6Y+rbYZhv7u4EfBx5O8jyjedGdZ8CF4KG/7/dX1f+rqm8CzzIKhNPdkLFvAu4FqKo/AH6A0bNyznSD/i0Y4nQLgCGPktgJbGzLnwC+Uu3KyWlu1rG3aZB/z+gf/zNlLhhmGXtVHa2q86tqVVWtYnT94/qq2juZ7i6YIb/v/4XRxX+SnM9oSui5sfZycQwZ+wvAVQBJPsAoAA6PtZeTsRO4qd0NtAY4WlUvzeeDTqspoDrBoySS3ArsraqdwF2MTgX3M/rLf8PkerxwBo79XwE/DPxeu+79QlVdP7FOL5CBYz/jDBz3g8DaJE8D3wX+cVW9MrleL4yBY/8l4D8k+YeMpkB+5kz4Yy/J5xlN6Z3frm9sBd4JUFW/yeh6x7XAfuB14OZ5H+sM+O8lSZqH020KSJK0QAwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI69f8BVUmyO8/txVUAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7face0b95d30>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(y_hat)\n",
    "_ = plt.axvline(x=0.5, color='orange')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'84.94'"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\"{:0.2f}\".format(roc_auc_score(y_valid, y_hat)*100.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
